We begin by loading the packages and sourcing the function that we will need.
# Data visualisation
library(ggplot2)
library(ggpubr)
library(scales)
library(ggnewscale)
theme_set(theme_minimal(base_size = 13))
# Modelling packages
library(unmarked)
# Data management
library(tidyverse)
library(lubridate)
# Source all custom functions
source("./utils/comparison_of_occupancy_models.R")
# Locale
Sys.setenv(LANG = "en_US.UTF-8")
Sys.setenv("LANGUAGE" = "EN")
The data was presented in Gimenez et al. (2022) and is available in the git repository associated to this publication. In this comparison, we only use data from the Ain county.
We first download and load the data set.
if (!dir.exists("./data/")){
dir.create("./data/")
}
# Download the data from the Ain county
if (!file.exists("./data/metadata_Ain.RData")) {
download.file(
"https://github.com/oliviergimenez/computo-deeplearning-occupany-lynx/raw/master/dat/metadata_Ain.RData",
"./data/metadata_Ain.RData"
)
}
# Load data from Ain
load('./data/metadata_Ain.RData')
allpic <- allfiles %>%
mutate(Keywords = as.character(observed)) %>% # pick manual tags
mutate(DateTime = ymd_hms(str_replace(DateTimeOriginal, "2019:02:29", "2019:03:01"))) %>%
mutate(FileName = pix) %>%
select(FileName, Directory, Keywords, DateTime)
rm(allfiles)
# Note: 4 rows failed to parse when we used "ymd_hms(DateTimeOriginal)"
# because date in DateTimeOriginal are 2019:02:29 (format %y:%m:%d)
# and there February only had 28 days in 2019...
# Since we will not use them, because they are not lynx detections,
# we don't have to investigate this further.
# We just replaced 2019:02:29 by 2019:03:01 because it's the most likely error.
# Add SiteID
allpic$SiteID = gsub("_.*$", "", allpic$FileName)
# Remove columns that we will not use
allpic = allpic %>%
select(SiteID, FileName, Keywords, DateTime)
We will only focus on the lynx here. But we’ll still look a bit more into our data set.
## We list all the labels that are used
(all_labels <- sort(unique(c(
allpic$Keywords[!grepl('"', allpic$Keywords)], unname(unlist(sapply(unique(allpic$Keywords), function(x) {
gsub('"', '', regmatches(x, gregexpr('"([^"]*)"', x))[[1]])
})))
))))
[1] "blaireaux" "cavalier" "cerf" "chamois" "chasseur" "chat"
[7] "chevreuil" "chien" "ecureuil" "fouine" "humain" "lievre"
[13] "lynx" "martre" "oiseaux" "renard" "sangliers" "vaches"
[19] "vehicule"
# Grouping some labels
human_labels = c("cavalier", "chasseur", "chien", "Fréquentation humaine", "humain", "vehicule", "véhicule")
squirrel_labels = c("ecureuil", "écureuil", "Sciuridae")
small_mustelids_labels = c("fouine", "martre")
lagomorph_labels = c("laporidés", "lievre", "lièvre")
## All the sites and deployment times
# + I added the number of detection events per species
allsites = allpic %>%
mutate(
NbLynx = grepl(pattern = "lynx", x = Keywords),
NbHuman = sapply(Keywords, function(lab) {any(grepl(paste(human_labels, collapse = "|"), lab))}),
NbBadger = grepl(pattern = "blaireaux", x = Keywords),
NbRedDeer = grepl(pattern = "cerf", x = Keywords),
NbChamois = grepl(pattern = "chamois", x = Keywords),
NbRoeDeer = grepl(pattern = "chevreuil", x = Keywords),
NbSquirrel = sapply(Keywords, function(lab) {any(grepl(paste(squirrel_labels, collapse = "|"), lab))}),
NbSmallMustelid = sapply(Keywords, function(lab) {any(grepl(paste(small_mustelids_labels, collapse = "|"), lab))}),
NbLagomorph = sapply(Keywords, function(lab) {any(grepl(paste(lagomorph_labels, collapse = "|"), lab))}),
NbFox = grepl(pattern = "renard", x = Keywords),
NbWildBoar = grepl(pattern = "sangliers", x = Keywords),
NbWildcat = grepl(pattern = "chat forestier", x = Keywords)
) %>%
group_by(SiteID) %>%
dplyr::summarise(
FirstPic = min(DateTime),
LastPic = max(DateTime),
across(starts_with("Nb"), ~ sum(.x)),
.groups = "keep"
) %>%
mutate(DeploymentDuration = LastPic - FirstPic) %>%
relocate(DeploymentDuration, .before = FirstPic)
allsites %>%
mutate(DeploymentDuration = round(DeploymentDuration, 2)) %>%
mutate(across(where(is.character), \(x) as.factor(x))) %>%
mutate(across(where(is.numeric), \(x) round(x, 2))) %>%
myDT(caption = "Monitoring periods and detections per site", pageLength=11)
Let’s see how many detections there are per site and per species.
allsites %>%
tidyr::pivot_longer(cols = starts_with("Nb"),
names_to = c("Species"),
names_prefix = "Nb",
values_to = "NbDetections") %>%
dplyr::group_by(Species) %>%
dplyr::mutate(SpeciesTxt = paste0(
str_replace(gsub("(?<!^)(?=[A-Z])", " ", Species, perl=TRUE), "\\s(\\w+)", function(x) {tolower(x)}),
"\n",
sum(NbDetections == 0), "/", length(unique(allsites$SiteID)),
" site", ifelse(sum(NbDetections == 0) > 1, "s", ""),
" with no detection"
)) %>%
ggplot() +
geom_histogram(aes(x = NbDetections, y = after_stat(count), fill = NbDetections > 0), binwidth = 1) +
geom_density(aes(x = NbDetections, y = after_stat(count))) +
facet_wrap(SpeciesTxt ~ ., scales = "free") +
labs(x = "Number of images with the species",
y = "Number of sites")+
theme(legend.position = "none")
For the rest of the analysis, we will only keep the data for the lynx.
LynxPic = allpic %>%
filter(grepl(pattern = "lynx", x = Keywords))
For the BP and COP models, we have to discretise data into a matrix per session of detection/non-detection (BP) or counts (COP). We’ll use the same discretisation intervals as in the simulation study: by month, week and day.
# Beginning of the sessions
BeginDateTime = min(allsites$FirstPic)
EndDateTime = max(allsites$LastPic)
# Month sessions
MonthSessions = seq(
floor_date(BeginDateTime, unit = "month"),
ceiling_date(EndDateTime, unit = "month"),
by = "months"
)
MonthSessionsLabels = format(MonthSessions, "%b %y", locale = "en_GB")
LynxPic$MonthSession = cut(
LynxPic$DateTime,
breaks = MonthSessions,
labels = MonthSessionsLabels[-length(MonthSessionsLabels)],
include.lowest = FALSE
)
# Week sessions
WeekSessions = seq(
floor_date(BeginDateTime, unit = "month"),
ceiling_date(EndDateTime, unit = "month"),
by = "week"
)
WeekSessionsLabels = format(WeekSessions, "%Y week %U", locale = "en_GB")
LynxPic$WeekSession = cut(
LynxPic$DateTime,
breaks = WeekSessions,
labels = WeekSessionsLabels[-length(WeekSessionsLabels)],
include.lowest = FALSE
)
# Day sessions
DaySessions = seq(
floor_date(BeginDateTime, unit = "month"),
ceiling_date(EndDateTime, unit = "month"),
by = "day"
)
DaySessionsLabels = format(DaySessions, "%d %B %Y", locale = "en_GB")
LynxPic$DaySession = cut(
LynxPic$DateTime,
breaks = DaySessions,
labels = DaySessionsLabels[-length(DaySessionsLabels)],
include.lowest = FALSE
)
# We now have the session for each photo of a lynx
LynxPic %>%
select(-FileName, -Keywords) %>%
print()
# A tibble: 203 x 5
SiteID DateTime MonthSession WeekSession DaySession
<chr> <dttm> <fct> <fct> <fct>
1 15.1 2017-10-01 07:17:59 Oct 17 2017 week 40 01 October 2017
2 15.2 2018-07-23 08:00:27 Jul 18 2018 week 29 23 July 2018
3 15.2 2018-07-23 08:00:32 Jul 18 2018 week 29 23 July 2018
4 15.2 2018-10-18 11:20:11 Oct 18 2018 week 41 18 October 2018
5 15.2 2018-07-23 08:00:29 Jul 18 2018 week 29 23 July 2018
6 15.2 2018-07-24 11:26:00 Jul 18 2018 week 29 24 July 2018
7 15.2 2018-07-25 16:03:00 Jul 18 2018 week 29 25 July 2018
8 15.2 2018-07-23 08:00:36 Jul 18 2018 week 29 23 July 2018
9 15.2 2018-07-25 19:25:16 Jul 18 2018 week 29 25 July 2018
10 15.2 2018-08-19 08:11:40 Aug 18 2018 week 33 19 August 2018
# i 193 more rows
We will reformat the data into lists, in the same format that we used for the simulation study, in order to reuse the same functions.
# Number of sites
NbSites = length(allsites$SiteID)
# List of the detection times
# format: detection_times[[site i]][[deployment j]] -> vector of detection times
# Because we only have one deployment per site, the format is:
# detection_times[[site i]][[1]] -> vector of detection times
#
# Detection times are numerics, between 0 and the latest detection,
# in the chosen time-unit
# list_T_ij
# Duration of deployment j at site i
# list_T_ij[[site i]] -> vector of the R_i duration of deployment(s) at site i
# Initialisation detection_times
detection_times <- vector(mode = "list", length = NbSites)
names(detection_times) <- allsites$SiteID
# Initialisation list_T_ij
list_T_ij <- vector(mode = "list", length = NbSites)
names(list_T_ij) <- allsites$SiteID
for (i in 1:NbSites) {
i_siteID = names(detection_times)[i]
# Date and time of the beginning and end of the deployment
i_BeginTime = allsites %>% filter(SiteID == i_siteID) %>% pull(FirstPic)
i_EndTime = allsites %>% filter(SiteID == i_siteID) %>% pull(LastPic)
# Date and time of detections
i_DetecDateTime = sort(LynxPic %>%
filter(SiteID == i_siteID) %>%
pull(DateTime))
# Days between the beginning of the deployment and each detection event
i_DetecTimeDays = as.numeric(
difftime(i_DetecDateTime, i_BeginTime, units = "days")
)
# Adding the time of detection events
detection_times[[i]] <- list("1" = i_DetecTimeDays)
# Duration of the deployment (in days too)
list_T_ij[[i]]<-as.numeric(
difftime(i_EndTime, i_BeginTime, units = "days")
)
}
# list_R_i is the number of deployments at sites
# (here 1 deployment only in all sites)
list_R_i = rep(1, NbSites)
Our continuous data now is formatted in lists.
detection_times is the time of detection in days since the
deployment began, which is, for the first three sites:
print(detection_times[1:3]) # Time of detection in days since the deployment began
$`15.1`
$`15.1`$`1`
[1] 61.79045
$`15.2`
$`15.2`$`1`
[1] 101.6343 109.1934 228.1474 285.2514 292.5099 305.3569 330.4648 396.7233
[9] 396.7234 396.7234 396.7234 397.0485 397.2232 397.5470 397.8661 398.3658
[17] 398.9069 399.0584 399.1989 399.5581 399.6976 423.7311 428.4760 464.9331
[25] 483.8620 539.2772 549.5200 569.6709 572.1523 575.1036 595.1144 596.7564
[33] 620.2774 625.4375
$`22.1`
$`22.1`$`1`
[1] 2.347917 9.562002 126.644248 160.734745 165.742106 165.742106 165.779595
And list_T_ij stores the deployment duration in days,
which is, for the first three sites:
print(list_T_ij[1:3]) # Deployment duration in days
$`15.1`
[1] 183.5267
$`15.2`
[1] 656.9314
$`22.1`
[1] 184.0091
We can now visualise the detection history of the lynx as well as the monitoring period per site.
# For the paper, we rename sites.
SiteID_2_SiteIDNew = left_join(
(
allsites %>%
tidyr::separate(
SiteID,
into = c("SiteIDleft", "SiteIDright"),
sep = "\\.",
remove = FALSE
)
),
(
allsites %>%
tidyr::separate(
SiteID,
into = c("SiteIDleft", "SiteIDright"),
sep = "\\.",
remove = FALSE
) %>%
dplyr::group_by(SiteIDleft) %>%
dplyr::summarise(NbLynxTot = sum(NbLynx)) %>%
dplyr::arrange(NbLynxTot) %>%
dplyr::mutate(SiteIDleftNew = LETTERS[1:nrow(.)])
),
by = "SiteIDleft"
) %>%
mutate(SiteIDNew = paste0(SiteIDleftNew, SiteIDright)) %>%
select(SiteID, SiteIDNew)
LynxDetecHistory_continuous = ggplot(data = (
allsites %>%
left_join(LynxPic, by = "SiteID") %>%
left_join(SiteID_2_SiteIDNew, by = "SiteID")
),
aes(x = DateTime, y = SiteIDNew)) +
geom_segment(aes(
x = FirstPic,
xend = LastPic,
yend = SiteIDNew,
color = "Monitoring period"
), linewidth = 2.5) +
geom_point(aes(fill = "Detection event"), shape = 4) +
scale_x_datetime(date_breaks = "3 month" , date_labels = "%b %y") +
scale_colour_manual(values = c("Monitoring period" = "grey"), name = "") +
scale_fill_manual(values = c("Detection event" = "black"), name = "") +
labs(title = "Lynx detection history") +
theme(
axis.text.x = element_text(
angle = 90,
vjust = 0.5,
hjust = 0.3
),
axis.title = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "bottom"
)
print(LynxDetecHistory_continuous)
# Export
if (!dir.exists('./output/')) {
dir.create('./output/')
}
ggsave(
"./output/lynx_detection_history_continuous.pdf",
plot = (
LynxDetecHistory_continuous +
theme(plot.title = element_blank())
),
width = 23,
height = 10,
unit = "cm"
)
jpeg(
filename = "./output/lynx_detection_history_continuous.jpeg",
width = 23,
height = 10,
unit = "cm",
res = 100
)
print(
LynxDetecHistory_continuous +
theme(plot.title = element_blank())
)
dev.off()
png
2
DayTable = as_tibble(data.frame("DaySession" = cut(
DaySessions,
breaks = DaySessions,
labels = DaySessionsLabels[-length(DaySessionsLabels)],
include.lowest = FALSE
)[-length(DaySessions)],
"DaySessionBegin" = DaySessions[-length(DaySessions)] + seconds(0),
"DaySessionEnd" = DaySessions[-1] - seconds(1e-16)
))
DayLongDF <- expand.grid(
"SiteID" = unique(allsites$SiteID),
"DaySession" = DayTable$DaySession) %>%
as_tibble() %>%
left_join(allsites,
by = "SiteID") %>%
left_join(DayTable, by = "DaySession") %>%
mutate(
MonitoringTime = replace_na(as.duration(
lubridate::intersect(
lubridate::interval(DaySessionBegin, DaySessionEnd),
lubridate::interval(FirstPic, LastPic)
)
), as.duration(seconds(0))),
FullyMonitored = (FirstPic <= DaySessionBegin &
LastPic >= DaySessionEnd),
PartiallyMonitored = !FullyMonitored & (MonitoringTime > 0) &
(MonitoringTime < max(MonitoringTime, na.rm = T))
) %>%
left_join((
LynxPic %>%
group_by(SiteID, DaySession) %>%
dplyr::summarise(n = n(), .groups = "keep")
),
by = c("SiteID", "DaySession")) %>%
mutate(NbDetec = ifelse(FullyMonitored, ifelse(is.na(n), 0, n), NA))
DayCountMatrix <- DayLongDF %>%
select(SiteID, DaySession, NbDetec) %>%
pivot_wider(names_from = "DaySession",
values_from = "NbDetec") %>%
column_to_rownames("SiteID")
LynxDetecHistory_day = ggplot() +
geom_tile(
data = DayLongDF,
aes(
x = as.Date(DaySessionBegin),
y = SiteID,
fill = ifelse(NbDetec == 0, NA, NbDetec)
)
) +
scale_fill_gradient(
low = "#93c47d",
high = "#274e13",
name = "Detections",
breaks = round(seq(
from = 1,
to = max(DayLongDF$NbDetec, na.rm = T),
length.out = 4
)),
na.value = "transparent"
) +
new_scale_fill() +
geom_tile(
data = (
DayLongDF %>%
filter(is.na(NbDetec) | NbDetec == 0) %>%
mutate(NbDetec0NA = ifelse(
is.na(NbDetec), "Not monitored", "No detection"
))
),
aes(
x = as.Date(DaySessionBegin),
y = SiteID,
fill = NbDetec0NA
)
) +
scale_fill_manual(
values = c(
"No detection" = "#ba3c3c",
"Not monitored" = "white"
),
name = ""
) +
scale_x_date(date_breaks = "3 month" , date_labels = "%B %Y") +
labs(title = "Lynx daily detection history") +
theme(
axis.text.x = element_text(
angle = 90,
vjust = 0.5,
hjust = 1
),
axis.title = element_blank(),
panel.grid = element_blank(),
legend.position = "bottom"
)
print(LynxDetecHistory_day)
DayLongDF %>%
dplyr::group_by(SiteID) %>%
dplyr::filter(PartiallyMonitored) %>%
dplyr::summarise(`Discarded (in days)` = as.character(as.period(as.duration(
sum(MonitoringTime)
)))) %>%
myDT(caption = "Monitored but discarded time per site with daily sessions", pageLength=11)
# For each monthly session, we check if each site was monitored during the
# entire session ("Monitored = TRUE") or not ("Monitored = FALSE").
# If the site was not monitored during the entire session, observation
# will be NAs.
WeekTable = data.frame("WeekSession" = cut(
WeekSessions,
breaks = WeekSessions,
labels = WeekSessionsLabels[-length(WeekSessionsLabels)],
include.lowest = FALSE
)[-length(WeekSessions)],
"WeekSessionBegin" = WeekSessions[-length(WeekSessions)],
"WeekSessionEnd" = WeekSessions[-1] - days(1)
)
WeekLongDF <- expand.grid(
"SiteID" = unique(allsites$SiteID),
"WeekSession" = WeekTable$WeekSession) %>%
as_tibble() %>%
left_join(allsites,
by = "SiteID") %>%
left_join(WeekTable, by = "WeekSession") %>%
mutate(
MonitoringTime = replace_na(as.duration(
lubridate::intersect(
lubridate::interval(WeekSessionBegin, WeekSessionEnd),
lubridate::interval(FirstPic, LastPic)
)
), as.duration(seconds(0))),
FullyMonitored = (FirstPic <= WeekSessionBegin &
LastPic >= WeekSessionEnd),
PartiallyMonitored = !FullyMonitored & (MonitoringTime > 0) &
(MonitoringTime < max(MonitoringTime, na.rm = T))
) %>%
left_join((
LynxPic %>%
group_by(SiteID, WeekSession) %>%
dplyr::summarise(n = n(), .groups = "keep")
),
by = c("SiteID", "WeekSession")) %>%
mutate(NbDetec = ifelse(FullyMonitored, ifelse(is.na(n), 0, n), NA))
WeekCountMatrix <- WeekLongDF %>%
select(SiteID, WeekSession, NbDetec) %>%
pivot_wider(names_from = "WeekSession",
values_from = "NbDetec") %>%
column_to_rownames("SiteID")
LynxDetecHistory_week = ggplot() +
geom_tile(
data = WeekLongDF,
aes(
x = as.Date(WeekSessionBegin),
y = SiteID,
fill = ifelse(NbDetec == 0, NA, NbDetec)
), colour = "grey"
) +
scale_fill_gradient(
low = "#93c47d",
high = "#274e13",
name = "Detections",
breaks = round(seq(
from = 1,
to = max(WeekLongDF$NbDetec, na.rm = T),
length.out = 4
)),
na.value = "transparent"
) +
new_scale_fill() +
geom_tile(
data = (
WeekLongDF %>%
filter(is.na(NbDetec) | NbDetec == 0) %>%
mutate(NbDetec0NA = ifelse(
is.na(NbDetec), "Not monitored", "No detection"
))
),
aes(
x = as.Date(WeekSessionBegin),
y = SiteID,
fill = NbDetec0NA
), colour = "grey"
) +
scale_fill_manual(
values = c(
"No detection" = "#ba3c3c",
"Not monitored" = "white"
),
name = ""
) +
scale_x_date(date_breaks = "3 month" , date_labels = "%B %Y") +
labs(title="Lynx weekly detection history")+
theme(
axis.text.x = element_text(
angle = 90,
vjust = 0.5,
hjust = 1
),
axis.title = element_blank(),
panel.grid = element_blank(),
legend.position = "bottom"
)
print(LynxDetecHistory_week)
WeekLongDF %>%
dplyr::group_by(SiteID) %>%
dplyr::filter(PartiallyMonitored) %>%
dplyr::summarise(`Discarded (in days)` = as.character(as.period(as.duration(
sum(MonitoringTime)
)))) %>%
myDT(caption = "Monitored but discarded time per site with weekly sessions", pageLength=11)
# For each monthly session, we check if each site was monitored during the
# entire session ("Monitored = TRUE") or not ("Monitored = FALSE").
# If the site was not monitored during the entire session, observation
# will be NAs.
MonthTable = data.frame("MonthSession" = cut(
MonthSessions,
breaks = MonthSessions,
labels = MonthSessionsLabels[-length(MonthSessionsLabels)],
include.lowest = FALSE
)[-length(MonthSessions)],
"MonthSessionBegin" = MonthSessions[-length(MonthSessions)],
"MonthSessionEnd" = MonthSessions[-1] - days(1)
)
MonthLongDF <- expand.grid(
"SiteID" = unique(allsites$SiteID),
"MonthSession" = MonthTable$MonthSession) %>%
as_tibble() %>%
left_join(allsites,
by = "SiteID") %>%
left_join(MonthTable, by = "MonthSession") %>%
mutate(
MonitoringTime = replace_na(as.duration(
lubridate::intersect(
lubridate::interval(MonthSessionBegin, MonthSessionEnd),
lubridate::interval(FirstPic, LastPic)
)
), as.duration(seconds(0))),
FullyMonitored = (FirstPic <= MonthSessionBegin &
LastPic >= MonthSessionEnd),
PartiallyMonitored = !FullyMonitored & (MonitoringTime > 0) &
(MonitoringTime < max(MonitoringTime, na.rm = T))
) %>%
left_join((
LynxPic %>%
group_by(SiteID, MonthSession) %>%
dplyr::summarise(n = n(), .groups = "keep")
),
by = c("SiteID", "MonthSession")) %>%
mutate(NbDetec = ifelse(FullyMonitored, ifelse(is.na(n), 0, n), NA))
MonthCountMatrix <- MonthLongDF %>%
select(SiteID, MonthSession, NbDetec) %>%
pivot_wider(names_from = "MonthSession",
values_from = "NbDetec") %>%
column_to_rownames("SiteID")
LynxDetecHistory_month = ggplot() +
geom_tile(
data = MonthLongDF,
aes(
x = as.Date(MonthSessionBegin),
y = SiteID,
fill = ifelse(NbDetec == 0, NA, NbDetec)
), colour = "grey"
) +
scale_fill_gradient(
low = "#93c47d",
high = "#274e13",
name = "Detections",
breaks = round(seq(
from = 1,
to = max(MonthLongDF$NbDetec, na.rm = T),
length.out = 4
)),
na.value = "transparent"
) +
new_scale_fill() +
geom_tile(
data = (
MonthLongDF %>%
filter(is.na(NbDetec) | NbDetec == 0) %>%
mutate(NbDetec0NA = ifelse(
is.na(NbDetec), "Not monitored", "No detection"
))
),
aes(
x = as.Date(MonthSessionBegin),
y = SiteID,
fill = NbDetec0NA
), colour = "grey"
) +
scale_fill_manual(
values = c(
"No detection" = "#ba3c3c",
"Not monitored" = "white"
),
name = ""
) +
scale_x_date(date_breaks = "3 month" , date_labels = "%B %Y") +
labs(title = "Lynx monthly detection history") +
theme(
axis.text.x = element_text(
angle = 90,
vjust = 0.5,
hjust = 1
),
axis.title = element_blank(),
panel.grid = element_blank(),
legend.position = "bottom"
)
print(LynxDetecHistory_month)
MonthLongDF %>%
dplyr::group_by(SiteID) %>%
dplyr::filter(PartiallyMonitored) %>%
dplyr::summarise(`Discarded (in days)` = as.character(as.period(as.duration(
sum(MonitoringTime)
)))) %>%
myDT(caption = "Monitored but discarded time per site with monthly sessions", pageLength=11)
MaxNbDetec_AllDiscretisations = max(DayLongDF$NbDetec,
WeekLongDF$NbDetec,
MonthLongDF$NbDetec,
na.rm = T)
# Month ----
MonthSessionsLabelsTrimester = MonthSessionsLabels
MonthSessionsLabelsTrimester[
sort(c(seq(from = 2, to = length(MonthSessionsLabels), by = 3),
seq(from = 3, to = length(MonthSessionsLabels), by = 3)))] <- ""
gg_Month = ggplot() +
geom_tile(
data = (
MonthLongDF %>%
left_join(SiteID_2_SiteIDNew, by = "SiteID") %>%
filter(is.na(NbDetec) | NbDetec == 0) %>%
mutate(NbDetec0NA = ifelse(is.na(NbDetec), NA, " "))
),
aes(
x = reorder(as.factor(format(
as_date(MonthSessionBegin), "%b %y"
)),
as_date(MonthSessionBegin)),
y = SiteIDNew,
fill = NbDetec0NA
)
) +
scale_fill_manual(values = c(" " = "#ba3c3c"),
na.value = "transparent",
name = "No detection") +
guides(fill = guide_legend(
title.position = 'top',
title.hjust = 0,
title.vjust = -1
)) +
new_scale_fill() +
geom_tile(data = (MonthLongDF %>% left_join(SiteID_2_SiteIDNew, by = "SiteID")),
aes(
x = reorder(as.factor(format(
as_date(MonthSessionBegin), "%b %y"
)),
as_date(MonthSessionBegin)),
y = SiteIDNew,
fill = ifelse(NbDetec == 0, NA, NbDetec)
)) +
scale_fill_gradient(
low = "#93c47d",
high = "#274e13",
name = "Detection(s)",
breaks = round(seq(
from = 1,
to = MaxNbDetec_AllDiscretisations,
length.out = 4
)),
limits = c(1, MaxNbDetec_AllDiscretisations),
na.value = "transparent"
) +
guides(fill = guide_colourbar(
title.position = 'top',
title.hjust = 0,
title.vjust = -1,
barheight = .5
)) +
scale_x_discrete(breaks = MonthSessionsLabels, labels = MonthSessionsLabelsTrimester) +
labs(title = "(a) Monthly discretisation") +
theme( axis.text.x = element_text(
angle = 90,
vjust = 0.5,
hjust = 1
),
axis.title = element_blank(),
plot.title = element_text(size = 12, hjust = .5),
panel.grid.major.x = element_blank(),
legend.position = "bottom",
legend.title = element_text(size = 12),
legend.title.align = 0.5
)
# Week ----
WeekSessionsLabelsTrimester = format(WeekSessions, "%b %y")
WeekSessionsLabelsTrimester[duplicated(WeekSessionsLabelsTrimester)] = ""
WeekSessionsLabelsTrimester[
!WeekSessionsLabelsTrimester %in% MonthSessionsLabelsTrimester
] = ""
gg_Week = ggplot() +
geom_tile(
data = (
WeekLongDF %>%
left_join(SiteID_2_SiteIDNew, by = "SiteID") %>%
filter(is.na(NbDetec) | NbDetec == 0) %>%
mutate(NbDetec0NA = ifelse(is.na(NbDetec), NA, "No detection"))
),
aes(
x =reorder(as.factor(format(as_date(WeekSessionBegin), "%Y week %U")),
as_date(WeekSessionBegin)),
y = SiteIDNew,
fill = NbDetec0NA
)
) +
scale_fill_manual(
values = c("No detection" = "#ba3c3c"),
na.value = "transparent", name = ""
) +
new_scale_fill() +
geom_tile(data = (WeekLongDF %>% left_join(SiteID_2_SiteIDNew, by = "SiteID")),
aes(
x = reorder(as.factor(format(as_date(WeekSessionBegin), "%Y week %U")),
as_date(WeekSessionBegin)),
y = SiteIDNew,
fill = ifelse(NbDetec == 0, NA, NbDetec)
)) +
scale_fill_gradient(
low = "#93c47d",
high = "#274e13",
name = "Detections",
breaks = round(seq(
from = 1,
to = MaxNbDetec_AllDiscretisations,
length.out = 4
)),
limits = c(1, MaxNbDetec_AllDiscretisations),
na.value = "transparent"
) +
scale_x_discrete(breaks = WeekSessionsLabels, labels = WeekSessionsLabelsTrimester) +
labs(title = "(b) Weekly discretisation") +
theme( axis.text.x = element_text(
angle = 90,
vjust = 0.5,
hjust = 1
),
axis.title = element_blank(),
plot.title = element_text(size = 12, hjust = .5),
panel.grid.major.x = element_blank(),
legend.position = "bottom"
)
# Day ----
DaySessionsLabelsTrimester = format(DaySessions, "%b %y")
DaySessionsLabelsTrimester[duplicated(DaySessionsLabelsTrimester)] = ""
DaySessionsLabelsTrimester[
!DaySessionsLabelsTrimester %in% MonthSessionsLabelsTrimester
] = ""
gg_Day = ggplot() +
geom_tile(
data = (
DayLongDF %>%
left_join(SiteID_2_SiteIDNew, by = "SiteID") %>%
filter(is.na(NbDetec) | NbDetec == 0) %>%
mutate(NbDetec0NA = ifelse(is.na(NbDetec), NA, "No detection"))
),
aes(
x =reorder(as.factor(format(as_date(DaySessionBegin), "%d %B %Y")),
as_date(DaySessionBegin)),
y = SiteIDNew,
fill = NbDetec0NA
)
) +
scale_fill_manual(
values = c("No detection" = "#ba3c3c"),
na.value = "transparent", name = ""
) +
new_scale_fill() +
geom_tile(data = (DayLongDF %>% left_join(SiteID_2_SiteIDNew, by = "SiteID")),
aes(
x = reorder(as.factor(format(as_date(DaySessionBegin), "%d %B %Y")),
as_date(DaySessionBegin)),
y = SiteIDNew,
fill = ifelse(NbDetec == 0, NA, NbDetec)
)) +
scale_fill_gradient(
low = "#93c47d",
high = "#274e13",
name = "Detections",
breaks = round(seq(
from = 1,
to = MaxNbDetec_AllDiscretisations,
length.out = 4
)),
limits = c(1, MaxNbDetec_AllDiscretisations),
na.value = "transparent"
) +
scale_x_discrete(breaks = DaySessionsLabels, labels = DaySessionsLabelsTrimester) +
labs(title = "(c) Daily discretisation")+
theme(
axis.text.x = element_text(
angle = 90,
vjust = 0.5,
hjust = 1
),
axis.title = element_blank(),
plot.title = element_text(size = 12, hjust = .5),
panel.grid.major.x = element_blank(),
legend.position = "bottom"
)
# Discrete ----
gg_Discrete = ggarrange(
gg_Month + theme(axis.text.x = element_blank()),
gg_Week + theme(axis.text.x = element_blank()),
gg_Day + theme(axis.text.x = element_text(angle = 0, hjust = .5)),
nrow = 3,
heights = c(.8, .8, 1),
common.legend = T,
legend = "bottom"
)
print(gg_Discrete)
# Export
ggsave(
"./output/lynx_detection_history_discrete.pdf",
plot = gg_Discrete,
width = 25,
height = 18,
unit = "cm"
)
jpeg(
filename = "./output/lynx_detection_history_discrete.jpeg",
width = 25,
height = 18,
unit = "cm",
res = 100
)
print(gg_Discrete)
dev.off()
png
2
We initialise the dataframe to store the result for all models.
ModelComparisonDF <- data.frame()
unmarked::occuWe create an unmarkedFrame object.
umf <- unmarkedFrameOccu(y = (as.matrix(MonthCountMatrix) > 1) * 1)
summary(umf)
unmarkedFrame Object
11 sites
Maximum number of observations per site: 29
Mean number of observations per site: 15.36
Sites with at least one detection: 8
Tabulation of y observations:
0 1 <NA>
118 51 150
We calculate intuitive starting points for likelihood maximisation.
For the occupancy probability, we use the ratio of sites with at least one detection.
(psi_init <- mean(rowSums(getY(umf), na.rm = TRUE) > 0))
[1] 0.7272727
For the detection probability, we focus on sites in which there was at least one detection and calculate the ratio of sessions with at least one detection.
(p_init <- mean(
getY(umf)[rowSums(getY(umf), na.rm = TRUE) > 0,] > 0,
na.rm = T
))
[1] 0.375
We run the model.
beforetime = Sys.time()
MonthOccuMod <- occu(formula = ~ 1 ~ 1,
data = umf,
method = "Nelder-Mead",
starts = c(qlogis(psi_init), qlogis(p_init))
)
aftertime = Sys.time()
print(MonthOccuMod)
Call:
occu(formula = ~1 ~ 1, data = umf, starts = c(qlogis(psi_init),
qlogis(p_init)), method = "Nelder-Mead")
Occupancy:
Estimate SE z P(>|z|)
1.11 0.742 1.49 0.136
Detection:
Estimate SE z P(>|z|)
-0.531 0.181 -2.94 0.00332
AIC: 196.3125
The model running time is:
And we look at the estimates.
# Estimates
backTransform(MonthOccuMod, type = "state")
Backtransformed linear combination(s) of Occupancy estimate(s)
Estimate SE LinComb (Intercept)
0.751 0.139 1.11 1
Transformation: logistic
backTransform(MonthOccuMod, type = "det")
Backtransformed linear combination(s) of Detection estimate(s)
Estimate SE LinComb (Intercept)
0.37 0.0421 -0.531 1
Transformation: logistic
# 95% Confidence intervals
plogis(confint(MonthOccuMod, type = 'state', method = 'normal'))
0.025 0.975
psi(Int) 0.4138047 0.9282099
plogis(confint(MonthOccuMod, type = 'det', method = 'normal'))
0.025 0.975
p(Int) 0.2922106 0.4559926
# 50% Confidence intervals
plogis(confint(MonthOccuMod, type = 'state', method = 'normal', level = 0.50))
0.25 0.75
psi(Int) 0.6468668 0.8328493
plogis(confint(MonthOccuMod, type = 'det', method = 'normal', level = 0.50))
0.25 0.75
p(Int) 0.3424411 0.3992181
For the comparison, we store those results in a dataframe.
ModelComparisonDF <- bind_rows(ModelComparisonDF, data.frame(
"Model" = "BP",
"Discretisation" = "Month",
# psi
"psi_TransformedPointEstimate" = unname(coef(MonthOccuMod)["psi(Int)"]),
"psi_TransformedSE" = unname(SE(MonthOccuMod)["psi(Int)"]),
"psi_PointEstimate" = backTransform(MonthOccuMod, type = "state")@estimate,
"psi_CI95lower" = plogis(confint(MonthOccuMod, type = 'state', method = 'normal'))[1],
"psi_CI95upper" = plogis(confint(MonthOccuMod, type = 'state', method = 'normal'))[2],
"psi_CI50lower" = plogis(confint(MonthOccuMod, type = 'state', method = 'normal', level = 0.50))[1],
"psi_CI50upper" = plogis(confint(MonthOccuMod, type = 'state', method = 'normal', level = 0.50))[2],
# p
"p_TransformedPointEstimate" = unname(coef(MonthOccuMod)["p(Int)"]),
"p_TransformedSE" = unname(SE(MonthOccuMod)["p(Int)"]),
"p_PointEstimate" = backTransform(MonthOccuMod, type = "det")@estimate,
"p_CI95lower" = plogis(confint(MonthOccuMod, type = 'det', method = 'normal'))[1],
"p_CI95upper" = plogis(confint(MonthOccuMod, type = 'det', method = 'normal'))[2],
"p_CI50lower" = plogis(confint(MonthOccuMod, type = 'det', method = 'normal', level = 0.50))[1],
"p_CI50upper" = plogis(confint(MonthOccuMod, type = 'det', method = 'normal', level = 0.50))[2]
))
We run the exact same process. We again start by creating an unmarkedFrame object.
umf <- unmarkedFrameOccu(y = (as.matrix(WeekCountMatrix) > 1) * 1)
summary(umf)
unmarkedFrame Object
11 sites
Maximum number of observations per site: 125
Mean number of observations per site: 69.09
Sites with at least one detection: 8
Tabulation of y observations:
0 1 <NA>
711 49 615
We calculate intuitive starting points for likelihood maximisation, using the same calculations as before.
For the occupancy probability, we use the ratio of sites with at least one detection.
(psi_init <- mean(rowSums(getY(umf), na.rm = TRUE) > 0))
[1] 0.7272727
For the detection probability, we focus on sites in which there was at least one detection and calculate the ratio of sessions with at least one detection.
(p_init <- mean(
getY(umf)[rowSums(getY(umf), na.rm = TRUE) > 0,] > 0,
na.rm = T
))
[1] 0.08085809
We run the model.
beforetime = Sys.time()
WeekOccuMod <- occu(formula = ~ 1 ~ 1,
data = umf,
method = "Nelder-Mead",
starts = c(qlogis(psi_init), qlogis(p_init))
)
aftertime = Sys.time()
print(WeekOccuMod)
Call:
occu(formula = ~1 ~ 1, data = umf, starts = c(qlogis(psi_init),
qlogis(p_init)), method = "Nelder-Mead")
Occupancy:
Estimate SE z P(>|z|)
1.19 0.786 1.52 0.129
Detection:
Estimate SE z P(>|z|)
-2.45 0.153 -16.1 4.95e-58
AIC: 356.3957
The model running time is:
And we look at the estimates.
# Estimates
backTransform(WeekOccuMod, type = "state")
Backtransformed linear combination(s) of Occupancy estimate(s)
Estimate SE LinComb (Intercept)
0.767 0.14 1.19 1
Transformation: logistic
backTransform(WeekOccuMod, type = "det")
Backtransformed linear combination(s) of Detection estimate(s)
Estimate SE LinComb (Intercept)
0.0792 0.0111 -2.45 1
Transformation: logistic
# 95% Confidence intervals
plogis(confint(WeekOccuMod, type = 'state', method = 'normal'))
0.025 0.975
psi(Int) 0.4137922 0.9390257
plogis(confint(WeekOccuMod, type = 'det', method = 'normal'))
0.025 0.975
p(Int) 0.05994302 0.1039822
# 50% Confidence intervals
plogis(confint(WeekOccuMod, type = 'state', method = 'normal', level = 0.50))
0.25 0.75
psi(Int) 0.6598463 0.8485753
plogis(confint(WeekOccuMod, type = 'det', method = 'normal', level = 0.50))
0.25 0.75
p(Int) 0.07201254 0.08705714
For the comparison, we store those results in a dataframe.
ModelComparisonDF <- bind_rows(ModelComparisonDF, data.frame(
"Model" = "BP",
"Discretisation" = "Week",
# psi
"psi_TransformedPointEstimate" = unname(coef(WeekOccuMod)["psi(Int)"]),
"psi_TransformedSE" = unname(SE(WeekOccuMod)["psi(Int)"]),
"psi_PointEstimate" = backTransform(WeekOccuMod, type = "state")@estimate,
"psi_CI95lower" = plogis(confint(WeekOccuMod, type = 'state', method = 'normal'))[1],
"psi_CI95upper" = plogis(confint(WeekOccuMod, type = 'state', method = 'normal'))[2],
"psi_CI50lower" = plogis(confint(WeekOccuMod, type = 'state', method = 'normal', level = 0.50))[1],
"psi_CI50upper" = plogis(confint(WeekOccuMod, type = 'state', method = 'normal', level = 0.50))[2],
# p
"p_TransformedPointEstimate" = unname(coef(WeekOccuMod)["p(Int)"]),
"p_TransformedSE" = unname(SE(WeekOccuMod)["p(Int)"]),
"p_PointEstimate" = backTransform(WeekOccuMod, type = "det")@estimate,
"p_CI95lower" = plogis(confint(WeekOccuMod, type = 'det', method = 'normal'))[1],
"p_CI95upper" = plogis(confint(WeekOccuMod, type = 'det', method = 'normal'))[2],
"p_CI50lower" = plogis(confint(WeekOccuMod, type = 'det', method = 'normal', level = 0.50))[1],
"p_CI50upper" = plogis(confint(WeekOccuMod, type = 'det', method = 'normal', level = 0.50))[2]
))
We create an unmarkedFrame object.
umf <- unmarkedFrameOccu(y = (as.matrix(DayCountMatrix) > 1) * 1)
summary(umf)
unmarkedFrame Object
11 sites
Maximum number of observations per site: 881
Mean number of observations per site: 489.64
Sites with at least one detection: 8
Tabulation of y observations:
0 1 <NA>
5349 37 4305
We calculate intuitive starting points for likelihood maximisation, using the same calculations as before.
For the occupancy probability, we use the ratio of sites with at least one detection.
(psi_init <- mean(rowSums(getY(umf), na.rm = TRUE) > 0))
[1] 0.7272727
For the detection probability, we focus on sites in which there was at least one detection and calculate the ratio of sessions with at least one detection.
(p_init <- mean(
getY(umf)[rowSums(getY(umf), na.rm = TRUE) > 0,] > 0,
na.rm = T
))
[1] 0.008628731
We run the model.
beforetime = Sys.time()
DayOccuMod <- occu(formula = ~ 1 ~ 1,
data = umf,
method = "Nelder-Mead",
starts = c(qlogis(psi_init), qlogis(p_init))
)
aftertime = Sys.time()
print(DayOccuMod)
Call:
occu(formula = ~1 ~ 1, data = umf, starts = c(qlogis(psi_init),
qlogis(p_init)), method = "Nelder-Mead")
Occupancy:
Estimate SE z P(>|z|)
1.39 0.895 1.55 0.121
Detection:
Estimate SE z P(>|z|)
-4.79 0.172 -27.9 3.2e-171
AIC: 440.5986
The model running time is:
And we look at the estimates.
# Estimates
backTransform(DayOccuMod, type = "state")
Backtransformed linear combination(s) of Occupancy estimate(s)
Estimate SE LinComb (Intercept)
0.8 0.143 1.39 1
Transformation: logistic
backTransform(DayOccuMod, type = "det")
Backtransformed linear combination(s) of Detection estimate(s)
Estimate SE LinComb (Intercept)
0.00828 0.00141 -4.79 1
Transformation: logistic
# 95% Confidence intervals
plogis(confint(DayOccuMod, type = 'state', method = 'normal'))
0.025 0.975
psi(Int) 0.4097286 0.9586336
plogis(confint(DayOccuMod, type = 'det', method = 'normal'))
0.025 0.975
p(Int) 0.005931632 0.0115549
# 50% Confidence intervals
plogis(confint(DayOccuMod, type = 'state', method = 'normal', level = 0.50))
0.25 0.75
psi(Int) 0.6868307 0.8800189
plogis(confint(DayOccuMod, type = 'det', method = 'normal', level = 0.50))
0.25 0.75
p(Int) 0.007384375 0.009289357
For the comparison, we store those results in a dataframe.
ModelComparisonDF <- bind_rows(ModelComparisonDF, data.frame(
"Model" = "BP",
"Discretisation" = "Day",
# psi
"psi_TransformedPointEstimate" = unname(coef(DayOccuMod)["psi(Int)"]),
"psi_TransformedSE" = unname(SE(DayOccuMod)["psi(Int)"]),
"psi_PointEstimate" = backTransform(DayOccuMod, type = "state")@estimate,
"psi_CI95lower" = plogis(confint(DayOccuMod, type = 'state', method = 'normal'))[1],
"psi_CI95upper" = plogis(confint(DayOccuMod, type = 'state', method = 'normal'))[2],
"psi_CI50lower" = plogis(confint(DayOccuMod, type = 'state', method = 'normal', level = 0.50))[1],
"psi_CI50upper" = plogis(confint(DayOccuMod, type = 'state', method = 'normal', level = 0.50))[2],
# p
"p_TransformedPointEstimate" = unname(coef(DayOccuMod)["p(Int)"]),
"p_TransformedSE" = unname(SE(DayOccuMod)["p(Int)"]),
"p_PointEstimate" = backTransform(DayOccuMod, type = "det")@estimate,
"p_CI95lower" = plogis(confint(DayOccuMod, type = 'det', method = 'normal'))[1],
"p_CI95upper" = plogis(confint(DayOccuMod, type = 'det', method = 'normal'))[2],
"p_CI50lower" = plogis(confint(DayOccuMod, type = 'det', method = 'normal', level = 0.50))[1],
"p_CI50upper" = plogis(confint(DayOccuMod, type = 'det', method = 'normal', level = 0.50))[2]
))
unmarked::occuCOPWe create an unmarkedFrame object. With COP, we have L the length of the observation periods, here the duration of the sessions, which is useful to better integrate months that have different durations for the estimation of a daily detection rate.
To make this comparable to the simulation study, we dropped the data of incomplete sessions, like we did for the BP model. However, because we can stipulate to the model that different sessions have different durations, we probably would have had a better estimation if we kept all data.
umf = unmarkedFrameOccuCOP(
y = as.matrix(MonthCountMatrix),
L = matrix(
data = rep(days_in_month(MonthSessions[-length(MonthSessions)]), each =
nrow(MonthCountMatrix)),
nrow = nrow(MonthCountMatrix),
ncol = ncol(MonthCountMatrix),
dimnames = dimnames(MonthCountMatrix)
)
)
summary(umf)
unmarkedFrameOccuCOP Object
11 sites
Maximum number of sampling occasions per site: 29
Mean number of sampling occasions per site: 15.36
Sites with at least one detection: 9
Tabulation of y observations:
0 1 2 3 4 5 6 7 8 14 <NA>
90 28 31 3 8 1 4 1 2 1 150
Tabulation of sampling occasions length:
28 30 31
33 99 187
We calculate intuitive starting points for likelihood maximisation.
For the occupancy probability, we once again use the ratio of sites with at least one detection.
(psi_init <- mean(rowSums(getY(umf), na.rm = TRUE) > 0))
[1] 0.8181818
For the detection rate (per day), we focus on sites in which there was at least one detection and calculate the average number of detections divided by the number of days in the deployment.
(lambda_init <- mean((getY(umf) / getL(umf))[rowSums(getY(umf), na.rm = TRUE) > 0, ],
na.rm = T))
[1] 0.04573462
We run the model.
beforetime = Sys.time()
MonthOccuCOPMod <- occuCOP(
data = umf,
psiformula = ~ 1,
lambdaformula = ~ 1,
method = "Nelder-Mead",
psistarts = qlogis(psi_init),
lambdastarts = log(lambda_init)
)
aftertime = Sys.time()
print(MonthOccuCOPMod)
Call:
occuCOP(data = umf, psiformula = ~1, lambdaformula = ~1, psistarts = qlogis(psi_init),
lambdastarts = log(lambda_init), method = "Nelder-Mead")
Occupancy probability:
Estimate SE z P(>|z|)
1.5 0.782 1.92 0.0543
Detection rate:
Estimate SE z P(>|z|)
-3.09 0.0713 -43.4 0
AIC: 86.80169
The model running time is:
And we look at the estimates.
# Estimates
backTransform(MonthOccuCOPMod, type = "psi")
Backtransformed linear combination(s) of Occupancy probability estimate(s)
Estimate SE LinComb (Intercept)
0.818 0.116 1.5 1
Transformation: logistic
backTransform(MonthOccuCOPMod, type = "lambda")
Backtransformed linear combination(s) of Detection rate estimate(s)
Estimate SE LinComb (Intercept)
0.0456 0.00325 -3.09 1
Transformation: exp
# 95% Confidence intervals
plogis(confint(MonthOccuCOPMod, type = 'psi', method = 'normal'))
0.025 0.975
psi(Int) 0.49299 0.9542132
plogis(confint(MonthOccuCOPMod, type = 'lambda', method = 'normal'))
0.025 0.975
lambda(Int) 0.03811187 0.04978027
# 50% Confidence intervals
plogis(confint(MonthOccuCOPMod, type = 'psi', method = 'normal', level = 0.50))
0.25 0.75
psi(Int) 0.7265227 0.8840954
plogis(confint(MonthOccuCOPMod, type = 'lambda', method = 'normal', level = 0.50))
0.25 0.75
lambda(Int) 0.0416153 0.04562218
For the comparison, we store those results in a dataframe.
ModelComparisonDF <- bind_rows(ModelComparisonDF, data.frame(
"Model" = "COP",
"Discretisation" = "Month",
# psi
"psi_TransformedPointEstimate" = unname(coef(MonthOccuCOPMod)["psi(Int)"]),
"psi_TransformedSE" = unname(SE(MonthOccuCOPMod)["psi(Int)"]),
"psi_PointEstimate" = backTransform(MonthOccuCOPMod, type = "psi")@estimate,
"psi_CI95lower" = plogis(confint(MonthOccuCOPMod, type = 'psi', method = 'normal'))[1],
"psi_CI95upper" = plogis(confint(MonthOccuCOPMod, type = 'psi', method = 'normal'))[2],
"psi_CI50lower" = plogis(confint(MonthOccuCOPMod, type = 'psi', method = 'normal', level = 0.50))[1],
"psi_CI50upper" = plogis(confint(MonthOccuCOPMod, type = 'psi', method = 'normal', level = 0.50))[2],
# lambda
"lambda_TransformedPointEstimate" = unname(coef(MonthOccuCOPMod)["lambda(Int)"]),
"lambda_TransformedSE" = unname(SE(MonthOccuCOPMod)["lambda(Int)"]),
"lambda_PointEstimate" = backTransform(MonthOccuCOPMod, type = "lambda")@estimate,
"lambda_CI95lower" = exp(confint(MonthOccuCOPMod, type = 'lambda', method = 'normal'))[1],
"lambda_CI95upper" = exp(confint(MonthOccuCOPMod, type = 'lambda', method = 'normal'))[2],
"lambda_CI50lower" = plogis(confint(MonthOccuCOPMod, type = 'lambda', method = 'normal', level = 0.50))[1],
"lambda_CI50upper" = plogis(confint(MonthOccuCOPMod, type = 'lambda', method = 'normal', level = 0.50))[2]
))
We create an unmarkedFrame object.
umf = unmarkedFrameOccuCOP(
y = as.matrix(WeekCountMatrix),
L = matrix(
7,
nrow = nrow(WeekCountMatrix),
ncol = ncol(WeekCountMatrix),
dimnames = dimnames(WeekCountMatrix)
)
)
summary(umf)
unmarkedFrameOccuCOP Object
11 sites
Maximum number of sampling occasions per site: 125
Mean number of sampling occasions per site: 69.09
Sites with at least one detection: 9
Tabulation of y observations:
0 1 2 3 4 5 6 8 14 <NA>
649 62 35 3 7 1 1 1 1 615
Tabulation of sampling occasions length:
7
1375
We calculate intuitive starting points for likelihood maximisation.
For the occupancy probability, we once again use the ratio of sites with at least one detection.
(psi_init <- mean(rowSums(getY(umf), na.rm = TRUE) > 0))
[1] 0.8181818
For the detection rate (per day), we focus on sites in which there was at least one detection and calculate the average number of detections divided by the number of days in the deployment.
(lambda_init <- mean((getY(umf) / getL(umf))[rowSums(getY(umf), na.rm = TRUE) > 0, ],
na.rm = T))
[1] 0.0457324
We run the model.
beforetime = Sys.time()
WeekOccuCOPMod <- occuCOP(
data = umf,
psiformula = ~ 1,
lambdaformula = ~ 1,
method = "Nelder-Mead",
psistarts = qlogis(psi_init),
lambdastarts = log(lambda_init)
)
aftertime = Sys.time()
print(WeekOccuCOPMod)
Call:
occuCOP(data = umf, psiformula = ~1, lambdaformula = ~1, psistarts = qlogis(psi_init),
lambdastarts = log(lambda_init), method = "Nelder-Mead")
Occupancy probability:
Estimate SE z P(>|z|)
1.5 0.782 1.92 0.0544
Detection rate:
Estimate SE z P(>|z|)
-3.08 0.0704 -43.8 0
AIC: 88.1427
The model running time is:
And we look at the estimates.
# Estimates
backTransform(WeekOccuCOPMod, type = "psi")
Backtransformed linear combination(s) of Occupancy probability estimate(s)
Estimate SE LinComb (Intercept)
0.818 0.116 1.5 1
Transformation: logistic
backTransform(WeekOccuCOPMod, type = "lambda")
Backtransformed linear combination(s) of Detection rate estimate(s)
Estimate SE LinComb (Intercept)
0.0457 0.00322 -3.08 1
Transformation: exp
# 95% Confidence intervals
plogis(confint(WeekOccuCOPMod, type = 'psi', method = 'normal'))
0.025 0.975
psi(Int) 0.492971 0.9541862
plogis(confint(WeekOccuCOPMod, type = 'lambda', method = 'normal'))
0.025 0.975
lambda(Int) 0.03831473 0.04987641
# 50% Confidence intervals
plogis(confint(WeekOccuCOPMod, type = 'psi', method = 'normal', level = 0.50))
0.25 0.75
psi(Int) 0.7264723 0.8840503
plogis(confint(WeekOccuCOPMod, type = 'lambda', method = 'normal', level = 0.50))
0.25 0.75
lambda(Int) 0.04179016 0.04576062
For the comparison, we store those results in a dataframe.
ModelComparisonDF <- bind_rows(ModelComparisonDF, data.frame(
"Model" = "COP",
"Discretisation" = "Week",
# psi
"psi_TransformedPointEstimate" = unname(coef(WeekOccuCOPMod)["psi(Int)"]),
"psi_TransformedSE" = unname(SE(WeekOccuCOPMod)["psi(Int)"]),
"psi_PointEstimate" = backTransform(WeekOccuCOPMod, type = "psi")@estimate,
"psi_CI95lower" = plogis(confint(WeekOccuCOPMod, type = 'psi', method = 'normal'))[1],
"psi_CI95upper" = plogis(confint(WeekOccuCOPMod, type = 'psi', method = 'normal'))[2],
"psi_CI50lower" = plogis(confint(WeekOccuCOPMod, type = 'psi', method = 'normal', level = 0.50))[1],
"psi_CI50upper" = plogis(confint(WeekOccuCOPMod, type = 'psi', method = 'normal', level = 0.50))[2],
# lambda
"lambda_TransformedPointEstimate" = unname(coef(WeekOccuCOPMod)["lambda(Int)"]),
"lambda_TransformedSE" = unname(SE(WeekOccuCOPMod)["lambda(Int)"]),
"lambda_PointEstimate" = backTransform(WeekOccuCOPMod, type = "lambda")@estimate,
"lambda_CI95lower" = exp(confint(WeekOccuCOPMod, type = 'lambda', method = 'normal'))[1],
"lambda_CI95upper" = exp(confint(WeekOccuCOPMod, type = 'lambda', method = 'normal'))[2],
"lambda_CI50lower" = plogis(confint(WeekOccuCOPMod, type = 'lambda', method = 'normal', level = 0.50))[1],
"lambda_CI50upper" = plogis(confint(WeekOccuCOPMod, type = 'lambda', method = 'normal', level = 0.50))[2]
))
We create an unmarkedFrame object.
umf = unmarkedFrameOccuCOP(
y = as.matrix(DayCountMatrix),
L = matrix(
1,
nrow = nrow(DayCountMatrix),
ncol = ncol(DayCountMatrix),
dimnames = dimnames(DayCountMatrix)
)
)
summary(umf)
unmarkedFrameOccuCOP Object
11 sites
Maximum number of sampling occasions per site: 881
Mean number of sampling occasions per site: 489.64
Sites with at least one detection: 9
Tabulation of y observations:
0 1 2 3 4 6 8 <NA>
5239 110 28 5 2 1 1 4305
Tabulation of sampling occasions length:
1
9691
We calculate intuitive starting points for likelihood maximisation.
For the occupancy probability, we once again use the ratio of sites with at least one detection.
(psi_init <- mean(rowSums(getY(umf), na.rm = TRUE) > 0))
[1] 0.8181818
For the detection rate (per day), we focus on sites in which there was at least one detection and calculate the average number of detections divided by the number of days in the deployment.
(lambda_init <- mean((getY(umf) / getL(umf))[rowSums(getY(umf), na.rm = TRUE) > 0, ],
na.rm = T))
[1] 0.04540371
We run the model.
beforetime = Sys.time()
DayOccuCOPMod <- occuCOP(
data = umf,
psiformula = ~ 1,
lambdaformula = ~ 1,
method = "Nelder-Mead",
psistarts = qlogis(psi_init),
lambdastarts = log(lambda_init)
)
aftertime = Sys.time()
print(DayOccuCOPMod)
Call:
occuCOP(data = umf, psiformula = ~1, lambdaformula = ~1, psistarts = qlogis(psi_init),
lambdastarts = log(lambda_init), method = "Nelder-Mead")
Occupancy probability:
Estimate SE z P(>|z|)
1.5 0.782 1.92 0.0544
Detection rate:
Estimate SE z P(>|z|)
-3.09 0.0702 -44.1 0
AIC: 88.21179
The model running time is:
And we look at the estimates.
# Estimates
backTransform(DayOccuCOPMod, type = "psi")
Backtransformed linear combination(s) of Occupancy probability estimate(s)
Estimate SE LinComb (Intercept)
0.818 0.116 1.5 1
Transformation: logistic
backTransform(DayOccuCOPMod, type = "lambda")
Backtransformed linear combination(s) of Detection rate estimate(s)
Estimate SE LinComb (Intercept)
0.0454 0.00319 -3.09 1
Transformation: exp
# 95% Confidence intervals
plogis(confint(DayOccuCOPMod, type = 'psi', method = 'normal'))
0.025 0.975
psi(Int) 0.4929715 0.9541861
plogis(confint(DayOccuCOPMod, type = 'lambda', method = 'normal'))
0.025 0.975
lambda(Int) 0.03806229 0.04951968
# 50% Confidence intervals
plogis(confint(DayOccuCOPMod, type = 'psi', method = 'normal', level = 0.50))
0.25 0.75
psi(Int) 0.7264724 0.8840502
plogis(confint(DayOccuCOPMod, type = 'lambda', method = 'normal', level = 0.50))
0.25 0.75
lambda(Int) 0.04150693 0.0454416
For the comparison, we store those results in a dataframe.
ModelComparisonDF <- bind_rows(ModelComparisonDF, data.frame(
"Model" = "COP",
"Discretisation" = "Day",
# psi
"psi_TransformedPointEstimate" = unname(coef(DayOccuCOPMod)["psi(Int)"]),
"psi_TransformedSE" = unname(SE(DayOccuCOPMod)["psi(Int)"]),
"psi_PointEstimate" = backTransform(DayOccuCOPMod, type = "psi")@estimate,
"psi_CI95lower" = plogis(confint(DayOccuCOPMod, type = 'psi', method = 'normal'))[1],
"psi_CI95upper" = plogis(confint(DayOccuCOPMod, type = 'psi', method = 'normal'))[2],
"psi_CI50lower" = plogis(confint(DayOccuCOPMod, type = 'psi', method = 'normal', level = 0.50))[1],
"psi_CI50upper" = plogis(confint(DayOccuCOPMod, type = 'psi', method = 'normal', level = 0.50))[2],
# lambda
"lambda_TransformedPointEstimate" = unname(coef(DayOccuCOPMod)["lambda(Int)"]),
"lambda_TransformedSE" = unname(SE(DayOccuCOPMod)["lambda(Int)"]),
"lambda_PointEstimate" = backTransform(DayOccuCOPMod, type = "lambda")@estimate,
"lambda_CI95lower" = exp(confint(DayOccuCOPMod, type = 'lambda', method = 'normal'))[1],
"lambda_CI95upper" = exp(confint(DayOccuCOPMod, type = 'lambda', method = 'normal'))[2],
"lambda_CI50lower" = plogis(confint(DayOccuCOPMod, type = 'lambda', method = 'normal', level = 0.50))[1],
"lambda_CI50upper" = plogis(confint(DayOccuCOPMod, type = 'lambda', method = 'normal', level = 0.50))[2]
))
We calculate intuitive starting points for likelihood maximisation.
# Number of detections per site
NbDetecPerSite <- sapply(detection_times, function(x){length(x[[1]])})
# Duration of the monitoring period per site
MonitoringDurationPerSite <- unlist(list_T_ij)
For the occupancy probability, we once again use the ratio of sites with at least one detection.
(psi_init <- mean(NbDetecPerSite > 0))
[1] 0.8181818
For the detection rate (per day), we focus on sites in which there was at least one detection and calculate the average number of detections.
(lambda_init <- mean((NbDetecPerSite / MonitoringDurationPerSite)[NbDetecPerSite > 0]))
[1] 0.04315439
We run the model.
beforetime = Sys.time()
fitted_PP <- optim(
# Initial parameters
par = c(
'psi' = logit(psi_init),
'lambda' = log(lambda_init)
),
# Function to optimize
fn = get_PP_neg_loglikelihood,
# Optim parameters
method = "Nelder-Mead",
# Other parameters of get_likelihood
detection_times = detection_times,
NbSites = NbSites,
list_T_ij = list_T_ij,
list_R_i = list_R_i,
hessian = T
)
aftertime = Sys.time()
print(fitted_PP)
$par
psi lambda
1.505713 -3.093956
$value
[1] 836.3008
$counts
function gradient
35 NA
$convergence
[1] 0
$message
NULL
$hessian
psi lambda
psi 1.6346218104 4.588117e-04
lambda 0.0004588117 2.030074e+02
The model running time is:
We calculate the 95% confidence interval.
fisher_info <- solve(fitted_PP$hessian)
if (any(diag(fisher_info) < 0)) {
se <- sqrt(diag(fisher_info) + 0i)
} else{
se <- sqrt(diag(fisher_info))
}
# 95% CI transformed (logit for psi, log for lambda)
upper95CI_fitted_PP <- fitted_PP$par + qnorm(.975) * se
lower95CI_fitted_PP <- fitted_PP$par - qnorm(.975) * se
# 50% CI transformed (logit for psi, log for lambda)
upper50CI_fitted_PP <- fitted_PP$par + qnorm(.75) * se
lower50CI_fitted_PP <- fitted_PP$par - qnorm(.75) * se
And we look at our estimates and add them to our result dataframe.
resDF <- data.frame(
"Model" = "PP",
"Discretisation" = "No discretisation",
# psi
"psi_TransformedPointEstimate" = unname(fitted_PP$par['psi']),
"psi_TransformedSE" = unname(se["psi"]),
"psi_PointEstimate" = plogis(unname(fitted_PP$par['psi'])),
"psi_CI95lower" = plogis(unname(lower95CI_fitted_PP['psi'])),
"psi_CI95upper" = plogis(unname(upper95CI_fitted_PP['psi'])),
"psi_CI50lower" = plogis(unname(lower50CI_fitted_PP['psi'])),
"psi_CI50upper" = plogis(unname(upper50CI_fitted_PP['psi'])),
# lambda
"lambda_TransformedPointEstimate" = unname(fitted_PP$par['lambda']),
"lambda_TransformedSE" = unname(se["lambda"]),
"lambda_PointEstimate" = exp(unname(fitted_PP$par['lambda'])),
"lambda_CI95lower" = exp(unname(lower95CI_fitted_PP['lambda'])),
"lambda_CI95upper" = exp(unname(upper95CI_fitted_PP['lambda'])),
"lambda_CI50lower" = plogis(unname(lower50CI_fitted_PP['lambda'])),
"lambda_CI50upper" = plogis(unname(upper50CI_fitted_PP['lambda']))
)
cat(paste(colnames(resDF), resDF[1, ], sep = ": ", collapse = "\n"))
Model: PP
Discretisation: No discretisation
psi_TransformedPointEstimate: 1.50571256852469
psi_TransformedSE: 0.782152351788616
psi_PointEstimate: 0.818424940325439
psi_CI95lower: 0.493180954971223
psi_CI95upper: 0.954292289691418
psi_CI50lower: 0.726742734086482
psi_CI50upper: 0.88424582170101
lambda_TransformedPointEstimate: -3.09395583060835
lambda_TransformedSE: 0.0701849568579153
lambda_PointEstimate: 0.0453223119325056
lambda_CI95lower: 0.0394975822303617
lambda_CI95upper: 0.0520060176576667
lambda_CI50lower: 0.0414356583550963
lambda_CI50upper: 0.0453637609703149
ModelComparisonDF <- bind_rows(
ModelComparisonDF,
resDF
)
We calculate intuitive starting points for likelihood maximisation.
# Number of detections per site
NbDetecPerSite <- sapply(detection_times, function(x){length(x[[1]])})
# Duration of the monitoring period per site
MonitoringDurationPerSite <- unlist(list_T_ij)
For the occupancy probability, we once again use the ratio of sites with at least one detection.
(psi_init <- mean(NbDetecPerSite > 0))
[1] 0.8181818
For the switching rates (\(\mu_{12}\), \(\mu_{21}\)), starting points are harder to estimate, although it could be done through a visual analysis for example. We will choose starting points at 1 (i.e. 1 switch per day on average).
mu_12_init = 1
mu_21_init = 1
For the daily detection rate \(\lambda_2\), we focus on sites in which there was at least one detection and calculate the average number of detections in state 2. To get this information, we need to divide the average number of detections per day by the time spent in state 2. With the IPP, we only have detections when we are in state 2. Because we chose \(\mu_{12}=\mu_{21}\) for our initial parameters, the system is in state 2 for half the time of the deployment, so even if this is probably not the best starting point, we will keep this logic.
(lambda2_init <- mean((NbDetecPerSite / MonitoringDurationPerSite)[NbDetecPerSite > 0])/2)
[1] 0.0215772
We run the model.
beforetime = Sys.time()
fitted_IPP <- optim(
# Initial parameters
par = c(
'psi' = logit(psi_init),
'lambda_2' = log(lambda2_init),
'mu_12' = log(mu_12_init),
'mu_21' = log(mu_21_init)
),
# Function to optimize
fn = get_IPP_neg_loglikelihood,
# Optim parameters
method = "Nelder-Mead",
# Other parameters of get_likelihood
detection_times = detection_times,
NbSites = NbSites,
list_T_ij = list_T_ij,
list_R_i = list_R_i,
hessian = T
)
aftertime = Sys.time()
print(fitted_IPP)
$par
psi lambda_2 mu_12 mu_21
1.502027 2.654741 -1.819536 3.926011
$value
[1] 656.1936
$counts
function gradient
237 NA
$convergence
[1] 0
$message
NULL
$hessian
psi lambda_2 mu_12 mu_21
psi 1.6384814217 1.118394e-04 1.427338e-04 -1.115410e-04
lambda_2 0.0001118394 1.333160e+02 1.232471e+02 -1.260003e+02
mu_12 0.0001427338 1.232471e+02 1.572448e+02 -1.257209e+02
mu_21 -0.0001115410 -1.260003e+02 -1.257209e+02 1.473917e+02
The model running time is:
We calculate the 95% confidence interval.
fisher_info <- solve(fitted_IPP$hessian)
if (any(diag(fisher_info) < 0)) {
se <- sqrt(diag(fisher_info) + 0i)
} else{
se <- sqrt(diag(fisher_info))
}
# 95% CI transformed (logit for psi, log for lambda)
upper95CI_fitted_IPP <- fitted_IPP$par + qnorm(.975) * se
lower95CI_fitted_IPP <- fitted_IPP$par - qnorm(.975) * se
# 50% CI transformed (logit for psi, log for lambda)
upper50CI_fitted_IPP <- fitted_IPP$par + qnorm(.75) * se
lower50CI_fitted_IPP <- fitted_IPP$par - qnorm(.75) * se
And we look at our estimates and add them to our result dataframe.
resDF <- data.frame(
"Model" = "IPP",
"Discretisation" = "No discretisation",
# psi
"psi_TransformedPointEstimate" = unname(fitted_IPP$par['psi']),
"psi_TransformedSE" = unname(se["psi"]),
"psi_PointEstimate" = plogis(unname(fitted_IPP$par['psi'])),
"psi_CI95lower" = plogis(unname(lower95CI_fitted_IPP['psi'])),
"psi_CI95upper" = plogis(unname(upper95CI_fitted_IPP['psi'])),
"psi_CI50lower" = plogis(unname(lower50CI_fitted_IPP['psi'])),
"psi_CI50upper" = plogis(unname(upper50CI_fitted_IPP['psi'])),
# lambda2
"lambda2_TransformedPointEstimate" = unname(fitted_IPP$par['lambda_2']),
"lambda2_TransformedSE" = unname(se["lambda_2"]),
"lambda2_PointEstimate" = exp(unname(fitted_IPP$par['lambda_2'])),
"lambda2_CI95lower" = exp(unname(lower95CI_fitted_IPP['lambda_2'])),
"lambda2_CI95upper" = exp(unname(upper95CI_fitted_IPP['lambda_2'])),
"lambda2_CI50lower" = exp(unname(lower50CI_fitted_IPP['lambda_2'])),
"lambda2_CI50upper" = exp(unname(upper50CI_fitted_IPP['lambda_2'])),
# mu12
"mu12_TransformedPointEstimate" = unname(fitted_IPP$par['mu_12']),
"mu12_TransformedSE" = unname(se["mu_12"]),
"mu12_PointEstimate" = exp(unname(fitted_IPP$par['mu_12'])),
"mu12_CI95lower" = exp(unname(lower95CI_fitted_IPP['mu_12'])),
"mu12_CI95upper" = exp(unname(upper95CI_fitted_IPP['mu_12'])),
"mu12_CI50lower" = exp(unname(lower50CI_fitted_IPP['mu_12'])),
"mu12_CI50upper" = exp(unname(upper50CI_fitted_IPP['mu_12'])),
# mu21
"mu21_TransformedPointEstimate" = unname(fitted_IPP$par['mu_21']),
"mu21_TransformedSE" = unname(se["mu_21"]),
"mu21_PointEstimate" = exp(unname(fitted_IPP$par['mu_21'])),
"mu21_CI95lower" = exp(unname(lower95CI_fitted_IPP['mu_21'])),
"mu21_CI95upper" = exp(unname(upper95CI_fitted_IPP['mu_21'])),
"mu21_CI50lower" = exp(unname(lower50CI_fitted_IPP['mu_21'])),
"mu21_CI50upper" = exp(unname(upper50CI_fitted_IPP['mu_21']))
)
cat(paste(colnames(resDF), resDF[1, ], sep = ": ", collapse = "\n"))
Model: IPP
Discretisation: No discretisation
psi_TransformedPointEstimate: 1.50202713555221
psi_TransformedSE: 0.781230588320164
psi_PointEstimate: 0.817876621654525
psi_CI95lower: 0.492711346046314
psi_CI95upper: 0.954052135773875
psi_CI50lower: 0.726133895808135
psi_CI50upper: 0.883804232267414
lambda2_TransformedPointEstimate: 2.65474069705417
lambda2_TransformedSE: 0.220176949186189
lambda2_PointEstimate: 14.2212979567295
lambda2_CI95lower: 9.23685820217277
lambda2_CI95upper: 21.8954660932768
lambda2_CI50lower: 12.2586719595043
lambda2_CI50upper: 16.4981423960265
mu12_TransformedPointEstimate: -1.81953578851366
mu12_TransformedSE: 0.157540140021699
mu12_PointEstimate: 0.1621009826089
mu12_CI95lower: 0.119038442826728
mu12_CI95upper: 0.22074153474117
mu12_CI50lower: 0.145759831977842
mu12_CI50upper: 0.180274141416172
mu21_TransformedPointEstimate: 3.92601134673108
mu21_TransformedSE: 0.194862933899288
mu21_PointEstimate: 50.7043317970344
mu21_CI95lower: 34.6080742396373
mu21_CI95upper: 74.2869783849231
mu21_CI50lower: 44.4594836706484
mu21_CI50upper: 57.8263409901236
ModelComparisonDF <- bind_rows(
ModelComparisonDF,
resDF
)
We will use the same starting points as we did with IPP. We just have to add an initial parameter for the detection rate in state 1 \(\lambda_1\). We’ll chose one close to 0.
lambda1_init = 0.01
We run the model.
beforetime = Sys.time()
fitted_2MMPP <- optim(
# Initial parameters
par = c(
'psi' = logit(psi_init),
'lambda_1' = log(lambda1_init),
'lambda_2' = log(lambda2_init),
'mu_12' = log(mu_12_init),
'mu_21' = log(mu_21_init)
),
# Function to optimize
fn = get_2MMPP_neg_loglikelihood,
# Optim parameters
method = "Nelder-Mead",
# Other parameters of get_likelihood
detection_times = detection_times,
NbSites = NbSites,
list_T_ij = list_T_ij,
list_R_i = list_R_i,
hessian = T
)
aftertime = Sys.time()
print(fitted_2MMPP)
$par
psi lambda_1 lambda_2 mu_12 mu_21
1.504410 -4.782945 2.870407 -2.290886 3.875047
$value
[1] 655.8123
$counts
function gradient
348 NA
$convergence
[1] 0
$message
NULL
$hessian
psi lambda_1 lambda_2 mu_12 mu_21
psi 1.635999e+00 3.372236e-05 8.009238e-05 1.091252e-04 -7.990764e-05
lambda_1 3.372236e-05 1.083773e+01 1.205987e+01 2.614429e+01 -1.361400e+01
lambda_2 8.009238e-05 1.205987e+01 9.463981e+01 7.631121e+01 -8.494856e+01
mu_12 1.091252e-04 2.614429e+01 7.631121e+01 9.443247e+01 -7.730811e+01
mu_21 -7.990764e-05 -1.361400e+01 -8.494856e+01 -7.730811e+01 1.050761e+02
The model running time is:
We calculate the 95% confidence interval.
fisher_info <- solve(fitted_2MMPP$hessian)
if (any(diag(fisher_info) < 0)) {
se <- sqrt(diag(fisher_info) + 0i)
} else{
se <- sqrt(diag(fisher_info))
}
# 95% CI transformed (logit for psi, log for lambda)
upper95CI_fitted_2MMPP <- fitted_2MMPP$par + qnorm(.975) * se
lower95CI_fitted_2MMPP <- fitted_2MMPP$par - qnorm(.975) * se
# 50% CI transformed (logit for psi, log for lambda)
upper50CI_fitted_2MMPP <- fitted_2MMPP$par + qnorm(.75) * se
lower50CI_fitted_2MMPP <- fitted_2MMPP$par - qnorm(.75) * se
And we look at our estimates and add them to our result dataframe.
resDF <- data.frame(
"Model" = "2-MMPP",
"Discretisation" = "No discretisation",
# psi
"psi_TransformedPointEstimate" = unname(fitted_2MMPP$par['psi']),
"psi_TransformedSE" = unname(se["psi"]),
"psi_PointEstimate" = plogis(unname(fitted_2MMPP$par['psi'])),
"psi_CI95lower" = plogis(unname(lower95CI_fitted_2MMPP['psi'])),
"psi_CI95upper" = plogis(unname(upper95CI_fitted_2MMPP['psi'])),
"psi_CI50lower" = plogis(unname(lower50CI_fitted_2MMPP['psi'])),
"psi_CI50upper" = plogis(unname(upper50CI_fitted_2MMPP['psi'])),
# lambda1
"lambda1_TransformedPointEstimate" = unname(fitted_2MMPP$par['lambda_1']),
"lambda1_TransformedSE" = unname(se["lambda_1"]),
"lambda1_PointEstimate" = exp(unname(fitted_2MMPP$par['lambda_1'])),
"lambda1_CI95lower" = exp(unname(lower95CI_fitted_2MMPP['lambda_1'])),
"lambda1_CI95upper" = exp(unname(upper95CI_fitted_2MMPP['lambda_1'])),
"lambda1_CI50lower" = exp(unname(lower50CI_fitted_2MMPP['lambda_1'])),
"lambda1_CI50upper" = exp(unname(upper50CI_fitted_2MMPP['lambda_1'])),
# lambda2
"lambda2_TransformedPointEstimate" = unname(fitted_2MMPP$par['lambda_2']),
"lambda2_TransformedSE" = unname(se["lambda_2"]),
"lambda2_PointEstimate" = exp(unname(fitted_2MMPP$par['lambda_2'])),
"lambda2_CI95lower" = exp(unname(lower95CI_fitted_2MMPP['lambda_2'])),
"lambda2_CI95upper" = exp(unname(upper95CI_fitted_2MMPP['lambda_2'])),
"lambda2_CI50lower" = exp(unname(lower50CI_fitted_2MMPP['lambda_2'])),
"lambda2_CI50upper" = exp(unname(upper50CI_fitted_2MMPP['lambda_2'])),
# mu12
"mu12_TransformedPointEstimate" = unname(fitted_2MMPP$par['mu_12']),
"mu12_TransformedSE" = unname(se["mu_12"]),
"mu12_PointEstimate" = exp(unname(fitted_2MMPP$par['mu_12'])),
"mu12_CI95lower" = exp(unname(lower95CI_fitted_2MMPP['mu_12'])),
"mu12_CI95upper" = exp(unname(upper95CI_fitted_2MMPP['mu_12'])),
"mu12_CI50lower" = exp(unname(lower50CI_fitted_2MMPP['mu_12'])),
"mu12_CI50upper" = exp(unname(upper50CI_fitted_2MMPP['mu_12'])),
# mu21
"mu21_TransformedPointEstimate" = unname(fitted_2MMPP$par['mu_21']),
"mu21_TransformedSE" = unname(se["mu_21"]),
"mu21_PointEstimate" = exp(unname(fitted_2MMPP$par['mu_21'])),
"mu21_CI95lower" = exp(unname(lower95CI_fitted_2MMPP['mu_21'])),
"mu21_CI95upper" = exp(unname(upper95CI_fitted_2MMPP['mu_21'])),
"mu21_CI50lower" = exp(unname(lower50CI_fitted_2MMPP['mu_21'])),
"mu21_CI50upper" = exp(unname(upper50CI_fitted_2MMPP['mu_21']))
)
cat(paste(colnames(resDF), resDF[1, ], sep = ": ", collapse = "\n"))
Model: 2-MMPP
Discretisation: No discretisation
psi_TransformedPointEstimate: 1.50441036232307
psi_TransformedSE: 0.781822988445301
psi_PointEstimate: 0.818231344999125
psi_CI95lower: 0.493016819768922
psi_CI95upper: 0.954207256666532
psi_CI50lower: 0.726528196092816
psi_CI50upper: 0.8840897043584
lambda1_TransformedPointEstimate: -4.78294476450616
lambda1_TransformedSE: 0.995922385227961
lambda1_PointEstimate: 0.00837131109556312
lambda1_CI95lower: 0.00118867412825875
lambda1_CI95upper: 0.0589554763519203
lambda1_CI50lower: 0.00427622697818252
lambda1_CI50upper: 0.016388009760998
lambda2_TransformedPointEstimate: 2.87040722958402
lambda2_TransformedSE: 0.31936893270487
lambda2_PointEstimate: 17.6442019780465
lambda2_CI95lower: 9.43527781654207
lambda2_CI95upper: 32.9950924069552
lambda2_CI50lower: 14.2249314062358
lambda2_CI50upper: 21.8853683403794
mu12_TransformedPointEstimate: -2.2908855713745
mu12_TransformedSE: 0.544556144954539
mu12_PointEstimate: 0.101176822870786
mu12_CI95lower: 0.0347979778013971
mu12_CI95upper: 0.294176562346545
mu12_CI50lower: 0.0700754255232265
mu12_CI50upper: 0.146081874063446
mu21_TransformedPointEstimate: 3.87504686033604
mu21_TransformedSE: 0.203585516009739
mu21_PointEstimate: 48.1849562014347
mu21_CI95lower: 32.3310019043005
mu21_CI95upper: 71.8131164325391
mu21_CI50lower: 42.0025578029422
mu21_CI50upper: 55.2773479897822
ModelComparisonDF <- bind_rows(
ModelComparisonDF,
resDF
)
PublicationDF <- ModelComparisonDF %>%
pivot_longer(cols = starts_with(c("psi", "p", "lambda", "mu")),
names_to = c("Parameter", "Interval"),
names_sep = "_")%>%
pivot_wider(names_from = "Interval",values_from = "value") %>%
filter(rowSums(is.na(.)) < 2)
PublicationDF %>%
mutate(TransformedPointEstimate = round(TransformedPointEstimate, 3),
TransformedSE = round(TransformedSE, 3),
PointEstimate = round(PointEstimate, 3),
CI95lower = round(CI95lower, 3),
CI95upper = round(CI95upper, 3),
CI50lower = round(CI50lower, 3),
CI50upper = round(CI50upper, 3),
Model = factor(Model, levels = c("BP", "COP", "PP", "IPP", "2-MMPP")),
Discretisation = factor(Discretisation, levels = c("No discretisation","Day","Week","Month")),
Parameter = as.factor(Parameter)) %>%
myDT(caption = "Occupancy models comparison on the lynx dataset")
plotPsiDiscrete = ModelComparisonDF %>%
filter(Model %in% c("BP", "COP")) %>%
mutate(
Model = factor(
as.character(Model),
levels = c("BP", "COP"),
labels = c("BP", "COP"),
ordered = T
),
Discretisation = factor(as.character(Discretisation), levels = c("Month", "Week", "Day"), ordered = T)
) %>%
ggplot(aes(group = interaction(Model, Discretisation))) +
geom_segment(
aes(
y = psi_CI95lower,
yend = psi_CI95upper,
x = Discretisation,
xend = Discretisation,
colour = "95% confidence interval"
),
linewidth = 2
) +
geom_segment(
aes(
y = psi_CI50lower,
yend = psi_CI50upper,
x = Discretisation,
xend = Discretisation,
colour = "50% confidence interval"
),
linewidth = 2
) +
geom_point(
aes(x = Discretisation, y = psi_PointEstimate, fill = "Point estimate"),
size = 3,
shape = 23,
colour = "transparent"
) +
facet_grid(. ~ Model,
switch = "y") +
scale_fill_manual(values = c("Point estimate" = "grey5"),
name = "") +
scale_colour_manual(
values = c(
"95% confidence interval" = "grey80",
"50% confidence interval" = "grey65"
),
name = ""
) +
ylim(0, 1) +
theme_minimal(base_size = 15) +
labs(y = "Estimated occupancy probability") +
scale_x_discrete(position = "top") +
theme(
strip.background.y = element_rect(fill = "gray93", colour = "white"),
strip.text.y = element_text(colour = "black", face = "bold"),
strip.background.x = element_rect(fill = "gray80", colour = "white"),
strip.text.x = element_text(colour = "black", face = "bold"),
axis.title.x = element_blank(),
axis.text.x = element_text(hjust=.5),
axis.text.y = element_text(hjust=.5),
legend.position = "bottom",
legend.title = element_text(hjust = 0.5),
strip.placement = "outside",
plot.title = element_blank(),
plot.margin = unit(c(0, 0, 0, 0), "cm"),
panel.border = element_rect(colour = "gray80", fill = "transparent"),
panel.grid.major.x = element_blank()
)
plotPsiContinuous = ModelComparisonDF %>%
filter(Model %in% c("PP", "IPP", "2-MMPP")) %>%
mutate(Model = factor(
as.character(Model),
levels = c("PP", "IPP", "2-MMPP"),
labels = c("PP", "IPP", "2-MMPP"),
ordered = T
)) %>%
ggplot(aes(group = interaction(Model, Discretisation))) +
geom_segment(
aes(
y = psi_CI95lower,
yend = psi_CI95upper,
x = Discretisation,
xend = Discretisation,
colour = "95% confidence interval"
),
linewidth = 2
) +
geom_segment(
aes(
y = psi_CI50lower,
yend = psi_CI50upper,
x = Discretisation,
xend = Discretisation,
colour = "50% confidence interval"
),
linewidth = 2
) +
geom_point(
aes(x = Discretisation, y = psi_PointEstimate, fill = "Point estimate"),
size = 3,
shape = 23,
colour = "transparent"
) +
facet_grid(. ~ Model,
switch = "y") +
scale_fill_manual(values = c("Point estimate" = "grey5"),
name = "") +
scale_colour_manual(
values = c(
"95% confidence interval" = "grey80",
"50% confidence interval" = "grey65"
),
name = ""
) +
ylim(0, 1) +
theme_minimal(base_size = 15) +
labs(y = "Estimated occupancy probability") +
scale_x_discrete(position = "top") +
theme(
strip.background.y = element_rect(fill = "gray93", colour = "white"),
strip.background.x = element_rect(fill = "gray80", colour = "white"),
strip.text.x = element_text(colour = "black", face = "bold"),
axis.title.x = element_blank(),
axis.text.x = element_text(hjust=.5),
legend.position = "bottom",
legend.title = element_text(hjust = 0.5),
strip.placement = "outside",
plot.title = element_blank(),
plot.margin = unit(c(0, 0, 0, 0), "cm"),
panel.border = element_rect(colour = "gray80", fill = "transparent"),
panel.grid.major.x = element_blank(),
# Remove y axis informations
strip.text.y = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank()
)
plotPsi = ggpubr::ggarrange(
plotPsiDiscrete,
plotPsiContinuous,
common.legend = TRUE,
legend = "bottom",
ncol = 2,
widths = c(1, 0.7)
) +
theme(plot.background = element_rect(fill = "white"))
print(plotPsi)
# Export
ggsave(
"./output/lynx_psi.pdf",
plot = plotPsi,
width = 25,
height = 10,
unit = "cm"
)
jpeg(
filename = "./output/lynx_psi.jpeg",
width = 25,
height = 10,
unit = "cm",
res = 100
)
print(plotPsi)
dev.off()
png
2
ModelComparisonDFlong = ModelComparisonDF %>%
pivot_longer(cols = starts_with(c("psi", "p", "lambda", "mu")),
names_to = c("Parameter", "Interval"),
names_sep = "_") %>%
pivot_wider(names_from = "Interval",values_from = "value")
detecParams = unique(ModelComparisonDFlong$Parameter)
detecParams = detecParams[detecParams != "psi"]
for (param in detecParams) {
ggDetec = ModelComparisonDFlong %>%
filter(Parameter == param) %>%
filter(!is.na(PointEstimate)) %>%
ggplot(aes(group = interaction(Model, Discretisation))) +
geom_segment(
aes(
y = CI95lower,
yend = CI95upper,
x = Discretisation,
xend = Discretisation,
colour = "95% confidence interval"
),
linewidth = 2
) +
geom_segment(
aes(
y = CI50lower,
yend = CI50upper,
x = Discretisation,
xend = Discretisation,
colour = "50% confidence interval"
),
linewidth = 2
) +
geom_point(
aes(x = Discretisation, y = PointEstimate, fill = "Point estimate"),
size = 3,
shape = 23,
colour = "transparent"
) +
facet_grid(. ~ Model,
switch = "y",
scales = "free") +
scale_fill_manual(values = c("Point estimate" = "grey5"),
name = "") +
scale_colour_manual(
values = c(
"95% confidence interval" = "grey80",
"50% confidence interval" = "grey65"
),
name = ""
) +
theme_minimal(base_size = 15) +
labs(y = paste("Estimated", param),
title = paste("Detection parameter:", param)) +
scale_x_discrete(position = "top") +
theme(
strip.background.y = element_rect(fill = "gray93", colour = "white"),
strip.text.y = element_text(colour = "black", face = "bold"),
strip.background.x = element_rect(fill = "gray80", colour = "white"),
strip.text.x = element_text(colour = "black", face = "bold"),
axis.title.x = element_blank(),
axis.text.x = element_text(hjust = .5),
axis.text.y = element_text(hjust = .5),
legend.position = "bottom",
legend.title = element_text(hjust = 0.5),
strip.placement = "outside",
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.margin = unit(c(0, 0, 0, 0), "cm"),
panel.border = element_rect(colour = "gray80", fill = "transparent"),
panel.grid.major.x = element_blank()
)
print(ggDetec)
}